All group randomization
pairwise_div_difference <- function(df){
df_div <- enframe(vegan::diversity(table(df$criteria, df$diagnosis)))
df_diff <- data.frame(t(combn(unique(sort(df$criteria)),2))) %>%
left_join(df_div, by = c("X1"="name")) %>%
left_join(df_div, by = c("X2"="name")) %>%
unite("pair", X1, X2, sep = ".") %>%
mutate(entropy_difference = value.x - value.y) %>%
select(-contains("value"))
deframe(df_diff)
}
# pairwise_div_difference(df)
enframe(pairwise_div_difference(df), name = "pair", value = "entropy_difference")
pairwise_div_iteration <- function(){
df %>%
count(criteria) %>%
mutate(diagnosis = map(n, function(x) sample(df$diagnosis, x, replace = T))) %>%
select(-n) %>%
unnest(diagnosis) %>%
pairwise_div_difference(.)
}
pairwise_div_iteration()
aha_kawasaki.eular_acr_sle aha_kawasaki.mcas_1 aha_kawasaki.mcas_2
-0.008694848 0.008252266 0.012165368
aha_kawasaki.migraine aha_kawasaki.slicc_sle eular_acr_sle.mcas_1
0.002825259 -0.001106212 0.016947114
eular_acr_sle.mcas_2 eular_acr_sle.migraine eular_acr_sle.slicc_sle
0.020860217 0.011520107 0.007588636
mcas_1.mcas_2 mcas_1.migraine mcas_1.slicc_sle
0.003913103 -0.005427007 -0.009358478
mcas_2.migraine mcas_2.slicc_sle migraine.slicc_sle
-0.009340110 -0.013271581 -0.003931471
set.seed(1234)
pairs_permutation_distributions <- data.frame(t(replicate(10000, pairwise_div_iteration())))
[WARNING] Deprecated: --self-contained. use --embed-resources --standalone
sapply(combn(unique(sort(df$criteria)),2, simplify = F), paste, collapse = ".")
[1] "aha_kawasaki.eular_acr_sle" "aha_kawasaki.mcas_1" "aha_kawasaki.mcas_2"
[4] "aha_kawasaki.migraine" "aha_kawasaki.slicc_sle" "eular_acr_sle.mcas_1"
[7] "eular_acr_sle.mcas_2" "eular_acr_sle.migraine" "eular_acr_sle.slicc_sle"
[10] "mcas_1.mcas_2" "mcas_1.migraine" "mcas_1.slicc_sle"
[13] "mcas_2.migraine" "mcas_2.slicc_sle" "migraine.slicc_sle"
names(pairs_permutation_distributions)
[1] "aha_kawasaki.eular_acr_sle" "aha_kawasaki.mcas_1" "aha_kawasaki.mcas_2"
[4] "aha_kawasaki.migraine" "aha_kawasaki.slicc_sle" "eular_acr_sle.mcas_1"
[7] "eular_acr_sle.mcas_2" "eular_acr_sle.migraine" "eular_acr_sle.slicc_sle"
[10] "mcas_1.mcas_2" "mcas_1.migraine" "mcas_1.slicc_sle"
[13] "mcas_2.migraine" "mcas_2.slicc_sle" "migraine.slicc_sle"
pairs_entropy_diff <- enframe(pairwise_div_difference(df), name = "pair", value = "entropy_difference") %>%
mutate(entropy_difference = ifelse(entropy_difference >=0, 0-entropy_difference, entropy_difference))
pairs_entropy_diff
read_csv(here("data/all_criteria_null_difference_distributions.csv")) %>%
# pivot_longer(everything(), names_to = "pair", values_to = "entropy_difference") %>%
ggplot(aes(x = entropy_difference)) +
geom_histogram(binwidth = 0.01)+
geom_vline(data = pairs_entropy_diff, aes(xintercept = entropy_difference), color = "red")+
facet_wrap(~pair, scales = "free")+
theme_classic()
Rows: 150000 Columns: 2── Column specification ─────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): pair
dbl (1): entropy_difference
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Calculate p-values for diversity differences based on the quantile of the
# diversity difference value compared to the null hypothesis diversity distribution values
# Pairwise comparison adjusted
entropy_diff_table <- read_csv(here("data/all_criteria_null_difference_distributions.csv")) %>%
# pivot_longer(everything(), names_to = "pair", values_to = "entropy_difference") %>%
group_by(pair) %>%
summarise(entropy_difference_distribution = list(entropy_difference)) %>%
right_join(pairs_entropy_diff) %>%
mutate(p_value = map2_dbl(entropy_difference, entropy_difference_distribution, ~ecdf(.y)(.x))) %>%
mutate(p_adj = p.adjust(p_value, method = "bonferroni", n = n()))
Rows: 150000 Columns: 2── Column specification ─────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): pair
dbl (1): entropy_difference
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Joining with `by = join_by(pair)`
entropy_diff_table
# Plot diversity values
broom::tidy(div_diag) %>%
mutate(names = tolower(names)) %>%
ggplot(aes(x=names, y = x))+
geom_bar(stat = "identity") +
ggpubr::stat_pvalue_manual(
entropy_diff_table %>% separate(pair, into = c("group1", "group2"), sep = "\\."),
label= "p_adj",
y.position = 7, step.increase = 0.2
) +
theme_bw() +
labs(x = "", y = "Shannon diversity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")

Pairwise randomization
pairs_list <- as.list(as.data.frame(combn(unique(df$criteria),2)))
names(pairs_list) <- sapply(pairs_list, paste, collapse = ".")
pairs_list
$mcas_1.mcas_2
[1] "mcas_1" "mcas_2"
$mcas_1.aha_kawasaki
[1] "mcas_1" "aha_kawasaki"
$mcas_1.eular_acr_sle
[1] "mcas_1" "eular_acr_sle"
$mcas_1.migraine
[1] "mcas_1" "migraine"
$mcas_1.slicc_sle
[1] "mcas_1" "slicc_sle"
$mcas_2.aha_kawasaki
[1] "mcas_2" "aha_kawasaki"
$mcas_2.eular_acr_sle
[1] "mcas_2" "eular_acr_sle"
$mcas_2.migraine
[1] "mcas_2" "migraine"
$mcas_2.slicc_sle
[1] "mcas_2" "slicc_sle"
$aha_kawasaki.eular_acr_sle
[1] "aha_kawasaki" "eular_acr_sle"
$aha_kawasaki.migraine
[1] "aha_kawasaki" "migraine"
$aha_kawasaki.slicc_sle
[1] "aha_kawasaki" "slicc_sle"
$eular_acr_sle.migraine
[1] "eular_acr_sle" "migraine"
$eular_acr_sle.slicc_sle
[1] "eular_acr_sle" "slicc_sle"
$migraine.slicc_sle
[1] "migraine" "slicc_sle"
# Create function that subsets df of all diagnoses for a criteria based on a pair of two criteria to compare
# Then randomizes the diagnoses across the two criteria,
# measures the resulting diversity of each criteria's diagnoses
# and calculates a difference in the diversity value
# randomize can be set F to obtain original difference in diversity w/o randomization
# To be used with replicate to generate a null hypothesis distribution
permutation_pair_iteration <- function(pair, randomize = T){
df <- df %>%
filter(criteria %in% pair)
if (randomize == T){
df <- df %>% mutate(diagnosis = sample(diagnosis, n()))
}
div <- vegan::diversity(table(df$criteria, df$diagnosis))
unname(div[1] - div[2])
}
replicate(10,permutation_pair_iteration(pairs_list[[1]], randomize = T))
[1] 0.0049722988 0.0081672943 0.0156799259 0.0038521584 0.0060045361 -0.0151564640
[7] 0.0144935438 -0.0333420796 0.0278905935 -0.0006473075
replicate(10,permutation_pair_iteration(pairs_list[[1]], randomize = F))
[1] -1.434911 -1.434911 -1.434911 -1.434911 -1.434911 -1.434911 -1.434911 -1.434911 -1.434911
[10] -1.434911
# # Create a null hypothesis distribution for the difference in diversity
# # For all pairs of criteria
# set.seed(1234)
# pairs_permutation_distributions <- lapply(pairs_list, function(x) replicate(10000, permutation_pair_iteration(x)))
# names(pairs_permutation_distributions) <- lapply(pairs_list, function(x) paste(x, collapse = "-"))
# data.frame(pairs_permutation_distributions)
# write_csv(data.frame(pairs_permutation_distributions), here("data/criteria_pairwise_null_difference_distributions.csv"))
pairs_permutation_distributions <- read_csv(here("data/criteria_pairwise_null_difference_distributions.csv"))
Rows: 10000 Columns: 15── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (15): mcas_1.mcas_2, mcas_1.aha_kawasaki, mcas_1.eular_acr_sle, mcas_1.migraine, mcas_1.slicc_sle, mcas_2.aha_kawasaki, mcas_2.eular_acr_sle, mcas_2.migraine, mcas_2.slicc_sle, aha_kawasaki.eular_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pairs_permutation_distributions
# Create a df of the actual difference in diversity between pairs of criteria
pairs_entropy_diff <- lapply(pairs_list, permutation_pair_iteration, randomize = F)
names(pairs_entropy_diff) <- lapply(pairs_list, function(x) paste(x, collapse = "-"))
pairs_entropy_diff <- data.frame(pairs_entropy_diff) %>%
pivot_longer(everything(), names_to = "pair", values_to = "entropy_difference") %>%
mutate(entropy_difference = ifelse(entropy_difference >=0, 0-entropy_difference, entropy_difference))
pairs_entropy_diff
# Create plots of null hypothesis distributions, overlaid with actual diversity difference
read_csv(here("data/all_criteria_pairwise_null_difference_distributions.csv")) %>%
group_by(pair) %>%
nest() %>%
separate(pair, into = c("p1","p2"), sep = "\\.", remove = F) %>%
rowwise() %>% mutate(pair=paste(sort(c(p1,p2)), collapse = ".")) %>%
select(!pair:p2) %>%
unnest(data) %>%
# pivot_longer(cols = everything(), names_to = "pair", values_to = "entropy_difference") %>%
ggplot(aes(x = entropy_difference)) +
geom_histogram(binwidth = 0.01)+
geom_vline(data = pairs_entropy_diff, aes(xintercept = entropy_difference), color = "red")+
facet_wrap(~pair, scales = "free")+
theme_classic()
Rows: 150000 Columns: 2── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): pair
dbl (1): entropy_difference
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Adding missing grouping variables: `pair`

# Calculate p-values for diversity differences based on the quantile of the
# diversity difference value compared to the null hypothesis diversity distribution values
# Pairwise comparison adjusted
data.frame(pairs_permutation_distributions) %>%
pivot_longer(everything(), names_to = "pair", values_to = "entropy_difference") %>%
group_by(pair) %>%
summarise(entropy_difference_distribution = list(entropy_difference)) %>%
right_join(pairs_entropy_diff) %>%
mutate(p_value = map2_dbl(entropy_difference, entropy_difference_distribution, ~ecdf(.y)(.x))) %>%
mutate(p_adj = p.adjust(p_value, method = "bonferroni", n = n()))
Joining with `by = join_by(pair)`